---
title: "Power, Proximity, Parity"
output: pdf_document
---

```{r}
library(fmsb)
library(miceadds)
library(foreign)
library(readstata13)
library(dplyr)
library(geosphere)
library(ggplot2)
#library(effects)
library(readxl)
library(tidyr)
library(lubridate)
library(sf)
library(sjlabelled)
library(ggeffects)
library(patchwork)
library(numbers)
library(rlist)
library(extrafont)
library(cleaner)
library(parallel)
library(Rlab)
library(texreg)
library(tictoc)
library(scales)
library(tidyverse)
library(dplyr)
library(caret)
library(pROC)
set.seed(1)
```


```{r}
library(haven)
Gibler_data <- read_dta("../Data/Gibler_data.dta")

Gibler_data$spline1 <- Gibler_data$`_spline1`
Gibler_data$spline2 <- Gibler_data$`_spline2`
Gibler_data$spline3 <- Gibler_data$`_spline3`

```


```{r}
 library(tidyverse) # load this first for most/all things
 library(peacesciencer) # the package of interest
library(stevemisc) # a dependency, but also used for standardizing variables for better interpretation
library(tictoc)

tic()
create_dyadyears(directed = FALSE, mry = FALSE) %>%
  add_gml_mids(keep = NULL) %>%
  add_contiguity()%>%
  add_peace_years() %>%
  add_strategic_rivalries() %>%
 add_cow_alliance() %>%
  add_nmc() %>%
  add_democracy() -> Data


Data$landcontig <- ifelse(Data$conttype == 1, 1, 0)
Data$joint_dem <- ifelse(Data$polity21 > 5 & Data$polity22 > 5 , 1, 0)
Data$cincprop <- ifelse(Data$cinc1 < Data$cinc2, (Data$cinc2/(Data$cinc2 + Data$cinc1)), (Data$cinc1/(Data$cinc2 + Data$cinc1)))
Data$cow_alliance <- ifelse(Data$cow_defense == 1 | Data$cow_entente == 1 | Data$cow_neutral == 1 | Data$cow_nonagg == 1, 1, 0)


#only include up to 2000

Data <- subset(Data, Data$year < 2001)


```


```{r import-data}
compare <- readstata13::read.dta13("../Data/location_05182011.dta")
nmc <- read.csv("../Data/NMC_5_0/NMC_5_0.csv")
mids_participants <- read.csv("../Data/MID_Level_4.3/MIDB 4.3.csv")
dyad_mids <- read.dta13("../Data/DYDMID3.1/dyadic mid 3.1_may 2018.dta")
dyad_mids <- subset(dyad_mids, dyad_mids$strtyr == year)
#dyad_mids <- subset(dyad_mids, dyad_mids$highact > 12)
dyad_mids <- subset(dyad_mids, dyad_mids$year < 2001)
locations <- read.csv("../Data/MIDLOC_2.1/MIDLOCA_2.1.csv")
caploc <- read.csv("../Data/latlong.csv", header=FALSE)
names(caploc) <- c("ccode", "country", "lat", "lat2", "long", "long2")



library(tidyverse)
library(stevemisc)


```





```{r no-one-wants-minutes}
caploc$caplat <- caploc$lat + caploc$lat2/60
caploc$caplong <- caploc$long + caploc$long2 / 60
caploc <- caploc %>% distinct(ccode, .keep_all = TRUE)
```

```{r generate-negatives}
library(dplyr)
States <- read.csv("../Data/states2016.csv")
States %>% 
  dplyr::select(ccode, styear, endyear) %>% 
  expand(ccode1=ccode, ccode2=ccode, year=seq(1816,2000)) %>% 
  dplyr::filter(ccode1!=ccode2) %>% 
  dplyr::left_join(., States, by=c("ccode1"="ccode")) %>%
  dplyr::filter(year >= styear & year <= endyear) %>%
  dplyr::select(-styear,-endyear) %>% 
  dplyr::left_join(., States, by=c("ccode2"="ccode")) %>%
  dplyr::filter(year >= styear & year <= endyear) %>%
  dplyr::filter(ccode1 < ccode2) %>% 
  dplyr::select(ccode1, ccode2, year) -> DY





lat <- runif(length(DY$ccode1), min = -90, max = 90)
long <- runif(length(DY$ccode1), min = -180, max = 180)
neg <- cbind(DY, lat, long)

lat <- runif(1000000-length(DY$ccode1), min = -90, max = 90)
long <- runif(1000000-length(DY$ccode1), min = -180, max = 180)
sample_list <- sample_n(DY, 1000000-length(DY$ccode1))
neg2 <- cbind(sample_list, lat, long)

neg <- rbind(neg,neg2)


```



```{r merge-locations}
mid_loc <- left_join(dyad_mids, locations, by = c("disno" = "dispnum"))
mid_loc$year <- mid_loc$year.x

mid_loc <- subset(mid_loc, !is.na(mid_loc$midloc2_xlongitude))
mid_loc$lat.x <- mid_loc$midloc2_ylatitude
mid_loc$long.x <- mid_loc$midloc2_xlongitude



#translate to ccode1 and 2

mid_loc$ccode1 <- ifelse(mid_loc$statea < mid_loc$stateb, mid_loc$statea, mid_loc$stateb)
mid_loc$ccode2 <- ifelse(mid_loc$statea < mid_loc$stateb, mid_loc$stateb, mid_loc$statea)

```

```{r add-capitals}
add_cap <- function(mid_loc, caploc)
{
  mid_loc <- merge(mid_loc, caploc, by.x = "ccode1", by.y = "ccode", all.x = TRUE)
  mid_loc <- merge(mid_loc, caploc, by.x = "ccode2", by.y = "ccode", suffixes = c("1", "2"), all.x = TRUE)
}

mid_loc <- add_cap(mid_loc, caploc)
neg <- add_cap(neg, caploc)
```




```{r reduce-vars}
reduce_data_frame <- function(frame)
{
  frame <- data.frame(year = frame$year, ccode1 = frame$ccode1, ccode2 = frame$ccode2, caplong1 = frame$caplong1, caplat1 = frame$caplat1, caplong2 = frame$caplong2, caplat2 = frame$caplat2, mid_lat = frame$lat.x, mid_long = frame$long.x)
  return(frame)
}

dat_pos <- reduce_data_frame(mid_loc)
dat_neg <- reduce_data_frame(neg)


onsetinit <- function(pos_frame, neg_frame)
{
  pos_frame$onsetdum <- 1
 
  
  neg_frame$onsetdum <- 0

  
  frame <- rbind(pos_frame, neg_frame)
  return(frame)
}

dat <- onsetinit(dat_pos, dat_neg)

```

```{r calc-vars}
calc_vars <- function(dat)
{
  dat$caplong1 <- round(dat$caplong1/.5)*.5
  dat$caplong2 <- round(dat$caplong2/.5)*.5
  dat$caplat1 <- round(dat$caplat1/.5)*.5
  dat$caplat2 <- round(dat$caplat2/.5)*.5
  
  dat$mid_long <- round(dat$mid_long/.5)*.5
  dat$mid_lat <- round(dat$mid_lat/.5)*.5
  
  cap_coord1 <- as.matrix(data.frame(dat$caplong1, dat$caplat1))
  cap_coord2 <- as.matrix(data.frame(dat$caplong2, dat$caplat2))
  mid_coord <- as.matrix(data.frame(dat$mid_long, dat$mid_lat))

  dat$distance1 <- distHaversine(mid_coord, cap_coord1) / 1000 / 100
  dat$distance2 <- distHaversine(mid_coord, cap_coord2) / 1000 / 100
  dat$distance1 <- ifelse(dat$distance1 == 0, 1, dat$distance1)
  dat$distance2 <- ifelse(dat$distance2 == 0, 1, dat$distance2)
  dat$distance <- distHaversine(cap_coord1, cap_coord2) / 1000 / 100
  


  #dat$dyad <- ifelse(dat$statea > dat$stateb, paste0(dat$statea, dat$stateb), paste0(dat$stateb, dat$statea))

  dat$mid_lat <- round(dat$mid_lat/0.5)*0.5
  dat$mid_long <- round(dat$mid_long/0.5)*0.5

  return(dat)
}

dat <- calc_vars(dat)

```



Sampling weights:

```{r}
nyear <- max(dat$year) - min(dat$year) + 1

#all dyads per year, x number of PRIO grid squares
poss <- 170547638400

formatC(poss, format="e")
```
MIDs: 6042

```{r}
per_actsual <- length(dyad_mids$disno)/poss
per_sample <- length(subset(dat$onsetdum, dat$onsetdum == 1)) / length(dat$onsetdum)	

weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	


dat$weight[dat$onsetdum == 1] <- weight_pos
dat$weight[dat$onsetdum == 0] <- weight_neg





############################################


#######merge other Gibler variabels

dat$DyadID <- paste(ifelse(dat$ccode1 < dat$ccode2, dat$ccode1, dat$ccode2),"_",ifelse(dat$ccode1 < dat$ccode2, dat$ccode2, dat$ccode1),"_",dat$year)

Data$DyadID <- paste(ifelse(Data$ccode1  < Data$ccode2, Data$ccode1, Data$ccode2),"_",ifelse(Data$ccode1 < Data$ccode2, Data$ccode2, Data$ccode1),"_",Data$year)

Gibler_data$DyadID <- paste(ifelse(Gibler_data$ccode1  < Gibler_data$ccode2, Gibler_data$ccode1, Gibler_data$ccode2),"_",ifelse(Gibler_data$ccode1 < Gibler_data$ccode2, Gibler_data$ccode2, Gibler_data$ccode1),"_",Gibler_data$year)

Gibler_data1 <- Gibler_data[c("DyadID","firstparity","spline1","spline2","spline3","allied","cwpceyrs_bkt","jointdem","riv1","small_island","outlier","westernhemi","europe","africa","mideast","asia","oceania","sameentry","year")]
      

dat1 <- dat[c("DyadID","onsetdum","distance1","distance2","mid_lat","mid_long","ccode1","ccode2")]

Data1 <- Data[c("DyadID","cinc1","cinc2","joint_dem","gmlmidspell","landcontig","ongoingrivalry")]


dat2 <- merge(dat1,Data1, by = "DyadID", all.x = T )
dat2 <- merge(dat2,Gibler_data1, by = "DyadID", all.x = T)


dat2$caprat <- (dat2$cinc1/(dat2$cinc1+dat2$cinc2))

dat2$cincprop <- ifelse(dat2$caprat < .5,(1-dat2$caprat),dat2$caprat)


dat2$effcaprat  <- (dat2$cinc1/dat2$distance1)/((dat2$cinc1/dat2$distance1)+(dat2$cinc2/dat2$distance2))

dat2$Eff_cinc_rat <- ifelse(dat2$effcaprat < .5,(1-dat2$effcaprat),dat2$effcaprat)

dat2$dyad <- dat2$ccode1 + (dat2$ccode2/1000)


```






```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}

####This code can be used to calculate closest-border-to-dispute distance. It takes a very long time to run (~10 hours), however, so the data is provided below. 



dat_w_border_distance <- dat2
dat_w_border_distance <- dat_w_border_distance[c(1,5,6,7,8,32)]
####Including closest border distance

library(cshapes)
library(geosphere)
library(ggplot2)
library(magrittr)
library(sf)
library(sp)
library(maptools)


#download CShapes2
#CSHAPES2 <- cshp(date = as.Date("2010", "%Y"), useGW = FALSE, dependencies = FALSE)
CSHAPES2 <- cshp(date = NA, useGW = FALSE, dependencies = FALSE)


#####reduce polygon complexity
CSHAPES2 <- rmapshaper::ms_simplify(CSHAPES2, keep_shapes=T, keep=0.01,
                                   method = "dp",  snap=T)


library(dplyr)








#####FIRST, WE ARE FIGURING OUT THE APPROPRIATE POLYGON FOR EACH STATE GIVEN THE DATE. HERE, WE ARE MTACHING IT TO THE INDIVIDUAL FID's USED BY CSHAPES. THIS WILL LATER TELL US WHICH POLYGON TO CALL
dat_w_border_distance$geometry_FID_state1 <- NA
dat_w_border_distance$potential_FIDs_1 <- NA
dat_w_border_distance$Matched_COW_1 <- NA

for (i in 1:
       nrow(dat_w_border_distance)
     #1
     ){
  CSHAPES2_filtered <- CSHAPES2
  CSHAPES2_filtered$after <- NA
   CSHAPES2_filtered$before <- NA
    CSHAPES2_filtered$COWcode <- NA
    CSHAPES2_filtered$after <- CSHAPES2_filtered$start <= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$before <- CSHAPES2_filtered$end >= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$COWcode <- CSHAPES2_filtered$cowcode == dat_w_border_distance$ccode1[i]
    CSHAPES2_filtered <- subset(CSHAPES2_filtered,CSHAPES2_filtered$after == T & CSHAPES2_filtered$before == T  & CSHAPES2_filtered$COWcode == T)
   
   # dat_w_border_distance$geometry_stateA[i] <- CSHAPES2_filtered$geometry[1]
    dat_w_border_distance$geometry_FID_state1[i] <- CSHAPES2_filtered$fid[1]
   dat_w_border_distance$potential_FIDs_1[i] <- nrow(CSHAPES2_filtered)
   dat_w_border_distance$Matched_COW_1[i] <- CSHAPES2_filtered$cowcode[1]
   
}


dat_w_border_distance$geometry_FID_state2 <- NA
dat_w_border_distance$potential_FIDs_2 <- NA
dat_w_border_distance$Matched_COW_2 <- NA
for (i in 1:nrow(dat_w_border_distance)) {
  CSHAPES2_filtered <- CSHAPES2
  CSHAPES2_filtered$after <- NA
   CSHAPES2_filtered$before <- NA
    CSHAPES2_filtered$COWcode <- NA
    CSHAPES2_filtered$after <- CSHAPES2_filtered$start <= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$before <- CSHAPES2_filtered$end >= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$COWcode <- CSHAPES2_filtered$cowcode == dat_w_border_distance$ccode2[i]
    CSHAPES2_filtered <- subset(CSHAPES2_filtered,CSHAPES2_filtered$after == T & CSHAPES2_filtered$before == T  & CSHAPES2_filtered$COWcode == T)
   
    #dat_w_border_distance$geometry_stateB[i] <- CSHAPES2_filtered$geometry[1]
    dat_w_border_distance$geometry_FID_state2[i] <- CSHAPES2_filtered$fid[1]
     dat_w_border_distance$potential_FIDs_2[i] <- nrow(CSHAPES2_filtered)
    dat_w_border_distance$Matched_COW_2[i] <- CSHAPES2_filtered$cowcode[1]
}

###OK, NOW WE HAVE THE APPROPRIATE FID's FOR STATE A AND B FOR EACH DYAD. NOW WE NEED TO CALL THE APPROPRIATE ONES AND CALCULATE DISTANCE


####DROP NA's
dat_w_border_distance <- subset(dat_w_border_distance,!is.na(dat_w_border_distance$geometry_FID_state1))
dat_w_border_distance <- subset(dat_w_border_distance,!is.na(dat_w_border_distance$geometry_FID_state2))



dat_w_border_distance$state_1_dist_from_border <- NA
dat_w_border_distance$state_1_closest_lon <- NA
dat_w_border_distance$state_1_closest_lat <- NA
dat_w_border_distance$state_1_cap_lon <- NA
dat_w_border_distance$state_1_cap_lat <- NA

for (i in #4:5
     1:nrow(dat_w_border_distance)
     ){
   
    STATE_1 <- CSHAPES2[CSHAPES2$fid == dat_w_border_distance$geometry_FID_state1[i],]
    
    STATE_1 <- subset(STATE_1,!is.na(STATE_1$caplong))
   # pts <- dat_w_border_distance[c("mid_lat","mid_long")]
   # data.frame(list(dat_w_border_distance$mid_lat[i],dat_w_border_distance$mid_long[i]))
    
    
  pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_1)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)


pts.wit.dist <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist )
DF <- DF[c(1:5)]



pts.sp <- sp::SpatialPoints(coords      = pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)


####only keep those OUTSIDE borders
pts.spdf <-as(pts.sp,"SpatialPointsDataFrame")
pts.spdf <- pts.spdf[wrld_subset,]  #only those inside dataframe NEED to figure out how to do opposite!
DF_Inside <- as.data.frame(pts.spdf)
DF <- merge(DF,DF_Inside,by = c("X1","X2"),all = T)
DF$distance_from_closest_border <- ifelse(is.na(DF$dummy),DF$distance,0)

dat_w_border_distance$state_1_dist_from_border[i] <- DF$distance_from_closest_border[1]
dat_w_border_distance$state_1_closest_lon[i] <- DF$lon[1]
dat_w_border_distance$state_1_closest_lat[i] <- DF$lat[1]
dat_w_border_distance$state_1_cap_lon[i] <- STATE_1$caplong[1]
dat_w_border_distance$state_1_cap_lat[i] <- STATE_1$caplat[1]

}
#convert metters to KM
dat_w_border_distance$state_1_dist_from_border <- dat_w_border_distance$state_1_dist_from_border/1000/100




dat_w_border_distance$state_2_dist_from_border <- NA
dat_w_border_distance$state_2_closest_lon <- NA
dat_w_border_distance$state_2_closest_lat <- NA
dat_w_border_distance$state_2_cap_lon <- NA
dat_w_border_distance$state_2_cap_lat <- NA
for (i in 1:nrow(dat_w_border_distance)
     ) {
    STATE_2 <- CSHAPES2[CSHAPES2$fid == dat_w_border_distance$geometry_FID_state2[i],]
    STATE_2 <- subset(STATE_2,!is.na(STATE_2$caplong))
   # pts <- dat_w_border_distance[c("mid_lat","mid_long")]
   # data.frame(list(dat_w_border_distance$mid_lat[i],dat_w_border_distance$mid_long[i]))
  pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_2)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)



pts.wit.dist <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist )
DF <- DF[c(1:5)]

pts.sp <- sp::SpatialPoints(coords      = pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)

####only keep those OUTSIDE borders
pts.spdf <-as(pts.sp,"SpatialPointsDataFrame")
pts.spdf <- pts.spdf[wrld_subset,]  #only those inside dataframe NEED to figure out how to do opposite!
DF_Inside <- as.data.frame(pts.spdf)
DF <- merge(DF,DF_Inside,by = c("X1","X2"),all = T)
DF$distance_from_closest_border <- ifelse(is.na(DF$dummy),DF$distance,0)

dat_w_border_distance$state_2_dist_from_border[i] <- DF$distance_from_closest_border[1]
dat_w_border_distance$state_2_closest_lon[i] <- DF$lon[1]
dat_w_border_distance$state_2_closest_lat[i] <- DF$lat[1]
dat_w_border_distance$state_2_cap_lon[i] <- STATE_2$caplong[1]
dat_w_border_distance$state_2_cap_lat[i] <- STATE_2$caplat[1]
}
#convert metters to KM
dat_w_border_distance$state_2_dist_from_border <- dat_w_border_distance$state_2_dist_from_border/1000/100



```


```{r eval=FALSE, include=FALSE}
#write.csv(dat_w_border_distance, "dat_w_border_distance.csv", row.names=FALSE)
#write.csv(dat2, "dat2.csv", row.names=FALSE)



```


```{r}
library(readr)
dat_w_border_distance <- read_csv("../Data/dat_w_border_distance_AS3.csv")

#dat2 <- read_csv("/Users/patrickhulme/Documents/GitHub/ParityPowerProximity/Code/dat2.csv")



```




```{r}
####compare COW coes, capital coordinates, etc.


dat_check <- merge(dat2,dat_w_border_distance, by = c("year","ccode1","ccode2","mid_lat","mid_long"))

dat_check <- dat_check[c("year","ccode1","Matched_COW_1","ccode2","Matched_COW_2","state_1_cap_lon","state_2_cap_lon","state_1_cap_lat","state_2_cap_lat","distance1","distance2","state_1_dist_from_border","state_2_dist_from_border","mid_long","mid_lat")]



cap_coord_1 <- as.matrix(data.frame(dat_check$state_1_cap_lon, dat_check$state_1_cap_lat))
cap_coord_2 <- as.matrix(data.frame(dat_check$state_2_cap_lon, dat_check$state_2_cap_lat))

 mid_coord <- as.matrix(data.frame(dat_check$mid_long, dat_check$mid_lat))
 
  #########Set minimum distance at 1.1, so can log distance if desired##########
  dat_check$state_1_dist_from_border <- ifelse(dat_check$state_1_dist_from_border < 1.1, 1.1, dat_check$state_1_dist_from_border)
  dat_check$state_2_dist_from_border <- ifelse(dat_check$state_2_dist_from_border < 1.1, 1.1, dat_check$state_2_dist_from_border)
  
  
    dat_check$distance1_cap <- distHaversine(mid_coord, cap_coord_1) / 1000 / 100
  dat_check$distance2_cap<- distHaversine(mid_coord, cap_coord_2) / 1000 / 100
  
  #########Set minimum distance at 1.1, so can log distance if desired##########
  dat_check$distance1_cap <- ifelse(dat_check$distance1_cap < 1.1, 1.1, dat_check$distance1_cap)
  dat_check$distance2_cap <- ifelse(dat_check$distance2_cap < 1.1, 1.1, dat_check$distance2_cap)

  

  
  plot(dat_check$distance1,dat_check$distance1_cap)
   plot(dat_check$distance2,dat_check$distance2_cap)
   
   plot(dat_check$distance1_cap,dat_check$state_1_dist_from_border)
plot(dat_check$distance2_cap,dat_check$state_2_dist_from_border)
```






```{r}

dat3 <- merge(dat2,dat_check,  by = c("year","ccode1","ccode2","mid_lat","mid_long"))



 dat3$cincdis1_cap <- dat3$cinc1 / dat3$distance1_cap
  dat3$cincdis2_cap <- dat3$cinc2 / dat3$distance2_cap


  dat3$effcaprat_cap <- dat3$cincdis1_cap / (dat3$cincdis1_cap + dat3$cincdis2_cap)


  
  dat3$cincdis1_border <- dat3$cinc1 / dat3$state_1_dist_from_border
  dat3$cincdis2_border  <- dat3$cinc2 / dat3$state_2_dist_from_border


  dat3$effcaprat_border <- dat3$cincdis1_border / (dat3$cincdis1_border + dat3$cincdis2_border)

  dat3$effcaprat_border  <- ifelse(dat3$effcaprat_border < .5,(1-dat3$effcaprat_border),dat3$effcaprat_border)
```


```{r}

Model <- c("model1.wg", "model2.wg","model3.wg","model4.wg","model5.wg","model6.wg","model11.wg", "model12.wg","model13.wg","model14.wg","model15.wg","model16.wg","model51.wg", "model52.wg","model53.wg","model54.wg","model55.wg","model56.wg")
Coeff <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
StanEr <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
Model_Coeff_StanEr  <- data.frame(Model, Coeff, StanEr)



#"gibler models"
model1.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)




Summary_model1.wg <- summary(model1.wg)
Model_Coeff_StanEr$Coeff[1] <- Summary_model1.wg[9,1]
Model_Coeff_StanEr$StanEr[1] <- Summary_model1.wg[9,2]




model2.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop +  outlier + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model2.wg <- summary(model2.wg)
Model_Coeff_StanEr$Coeff[2] <- Summary_model2.wg[9,1]
Model_Coeff_StanEr$StanEr[2] <- Summary_model2.wg[9,2]



model3.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop +  small_island + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model3.wg <- summary(model3.wg)
Model_Coeff_StanEr$Coeff[3] <- Summary_model3.wg[9,1]
Model_Coeff_StanEr$StanEr[3] <- Summary_model3.wg[9,2]



model4.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop + westernhemi + europe + africa + mideast + asia + oceania + sameentry + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model4.wg <- summary(model4.wg)
Model_Coeff_StanEr$Coeff[4] <- Summary_model4.wg[9,1]
Model_Coeff_StanEr$StanEr[4] <- Summary_model4.wg[9,2]



model5.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop +  firstparity + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model5.wg <- summary(model5.wg)
Model_Coeff_StanEr$Coeff[5] <- Summary_model5.wg[9,1]
Model_Coeff_StanEr$StanEr[5] <- Summary_model5.wg[9,2]





model6.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$cincprop +  firstparity + riv1 + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model6.wg <- summary(model6.wg)
Model_Coeff_StanEr$Coeff[6] <- Summary_model6.wg[9,1]
Model_Coeff_StanEr$StanEr[6] <- Summary_model6.wg[9,2]






#"gibler models" adjusted for distance




model11.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model11.wg <- summary(model11.wg)
Model_Coeff_StanEr$Coeff[7] <- Summary_model11.wg[9,1]
Model_Coeff_StanEr$StanEr[7] <- Summary_model11.wg[9,2]



model12.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat +  outlier + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model12.wg <- summary(model12.wg)
Model_Coeff_StanEr$Coeff[8] <- Summary_model12.wg[9,1]
Model_Coeff_StanEr$StanEr[8] <- Summary_model12.wg[9,2]





model13.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat +  small_island + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model13.wg <- summary(model13.wg)
Model_Coeff_StanEr$Coeff[9] <- Summary_model13.wg[9,1]
Model_Coeff_StanEr$StanEr[9] <- Summary_model13.wg[9,2]



model14.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat + westernhemi + europe + africa + mideast + asia + oceania + sameentry + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model14.wg <- summary(model14.wg)
Model_Coeff_StanEr$Coeff[10] <- Summary_model14.wg[9,1]
Model_Coeff_StanEr$StanEr[10] <- Summary_model14.wg[9,2]




model15.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat +  firstparity + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model15.wg <- summary(model15.wg)
Model_Coeff_StanEr$Coeff[11] <- Summary_model15.wg[9,1]
Model_Coeff_StanEr$StanEr[11] <- Summary_model15.wg[9,2]



model16.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$Eff_cinc_rat +  firstparity + riv1 + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model16.wg <- summary(model16.wg)
Model_Coeff_StanEr$Coeff[12] <- Summary_model16.wg[9,1]
Model_Coeff_StanEr$StanEr[12] <- Summary_model16.wg[9,2]





###iwth border distance

model51.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model51.wg <- summary(model51.wg)
Model_Coeff_StanEr$Coeff[13] <- Summary_model51.wg[9,1]
Model_Coeff_StanEr$StanEr[13] <- Summary_model51.wg[9,2]



model52.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border +  outlier + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model52.wg <- summary(model52.wg)
Model_Coeff_StanEr$Coeff[14] <- Summary_model52.wg[9,1]
Model_Coeff_StanEr$StanEr[14] <- Summary_model52.wg[9,2]





model53.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border +  small_island + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model53.wg <- summary(model53.wg)
Model_Coeff_StanEr$Coeff[15] <- Summary_model53.wg[9,1]
Model_Coeff_StanEr$StanEr[15] <- Summary_model53.wg[9,2]



model54.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border + westernhemi + europe + africa + mideast + asia + oceania + sameentry + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model54.wg <- summary(model54.wg)
Model_Coeff_StanEr$Coeff[16] <- Summary_model54.wg[9,1]
Model_Coeff_StanEr$StanEr[16] <- Summary_model54.wg[9,2]




model55.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border +  firstparity + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model55.wg <- summary(model55.wg)
Model_Coeff_StanEr$Coeff[17] <- Summary_model55.wg[9,1]
Model_Coeff_StanEr$StanEr[17] <- Summary_model55.wg[9,2]



model56.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat3$effcaprat_border +  firstparity + riv1 + mid_lat + mid_long, data=dat3, family = "binomial", cluster = dat3$dyad, weights = dat3$weight)
Summary_model56.wg <- summary(model56.wg)
Model_Coeff_StanEr$Coeff[18] <- Summary_model56.wg[9,1]
Model_Coeff_StanEr$StanEr[18] <- Summary_model56.wg[9,2]





texreg(list(model1.wg, model2.wg, model3.wg, model4.wg, model5.wg, model6.wg
            #, model17.wg
            ), file =  "../Paper/Tables/Table_A16.tex", table = FALSE)


texreg(list(model11.wg, model12.wg, model13.wg, model14.wg, model15.wg, model16.wg
            #, model17.wg
            ), file =  "../Paper/Tables/Table_A17.tex", table = FALSE)


texreg(list(model51.wg, model52.wg, model53.wg, model54.wg, model55.wg, model56.wg
            #, model17.wg
            ), file =  "../Paper/Tables/Table_A18.tex", table = FALSE)






Model_Coeff_StanEr$conf.low_95 <- Model_Coeff_StanEr$Coeff-1.96*Model_Coeff_StanEr$StanEr

Model_Coeff_StanEr$conf.high_95 <- Model_Coeff_StanEr$Coeff+1.96*Model_Coeff_StanEr$StanEr

Model_Coeff_StanEr$`Measure of Power` <- c("Nominal Ratio","Nominal Ratio","Nominal Ratio","Nominal Ratio","Nominal Ratio","Nominal Ratio","Proximate Ratio","Proximate Ratio","Proximate Ratio","Proximate Ratio","Proximate Ratio","Proximate Ratio","Proximate Ratio (Border)","Proximate Ratio (Border)","Proximate Ratio (Border)","Proximate Ratio (Border)","Proximate Ratio (Border)","Proximate Ratio (Border)")

Model_Coeff_StanEr$Model_Name <- c("Model 1","Model 2","Model 3 Nominal","Model 4 Nominal","Model 5 Nominal","Model 6 Nominal","Model 1 Effective"," Model 2 Effective","Model 3 Effective","Model 4 Effective","Model 5 Effective","Model 6 Effective","Model 1 Effective (Border)"," Model 2 Effective (Border)","Model 3 Effective (Border)","Model 4 Effective (Border)","Model 5 Effective (Border)","Model 6 Effective (Border)")

Model_Coeff_StanEr$coord <- c(-1,-2,-3,-4,-5,-6,-1.03,-2.03,-3.03,-4.03,-5.03,-6.03,-1.13,-2.13,-3.13,-4.13,-5.13,-6.13)





pdf(file = "../Code/Figure_A4.pdf",   # The directory you want to save the file in
   # width = 4, # The width of the plot in inches
   height = 2
    )

ggplot(Model_Coeff_StanEr, aes(x = coord, y = Coeff)) +
        geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
        geom_point(aes(x = coord, 
                    y = Coeff)) + 
        geom_linerange(aes(x = coord, 
                     ymin = conf.low_95,
                     ymax = conf.high_95, linetype = `Measure of Power`,color =`Measure of Power`),
                   lwd = 0.4)  + 
        ggtitle("Conflict Propensity") +
        coord_flip() + theme_bw()  + scale_x_continuous(name="Models", breaks=seq(-6,-1,1), labels = c("Model 6","Model 5","Model 4","Model 3", "Model 2","Model 1"),limits=c(-6.5, -0.5)) +
  theme(text=element_text(family="Times")) + ylab("Coefficient of Relative Power")
dev.off()


```






```{r}





####Figure A5{Online Appendix}
library(MASS)
library(boot)



dat2$joint_dem <- ifelse(is.na(dat2$joint_dem),0,dat2$joint_dem)
dat2$allied <- ifelse(is.na(dat2$allied),0,dat2$allied)
dat2 <- na.omit(dat2) 


func<- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat2$Eff_cinc_rat +  small_island, data=dat2, family = "binomial", cluster = dat2$dyad, weights = dat2$weight)

func2<- model3.wg <- glm.cluster(onsetdum ~ allied + jointdem + cwpceyrs_bkt + spline1 + spline2 + spline3 + landcontig + dat2$cincprop +  small_island, data=dat2, family = "binomial", cluster = dat2$dyad, weights = dat2$weight)

lwg.range <- seq(from=min(dat2$Eff_cinc_rat),to=max(dat2$Eff_cinc_rat),by=.01)


x.lo <- c(1, #intercept
          0, #Allied
          0,#Jointdem
          median(dat2$cwpceyrs_bkt),#peace years
          median(dat2$spline1), #spline1
          median(dat2$spline2), #spline2
          median(dat2$spline3), #spline3
          0, #landcontig
          0, #Eff_cinc_rat ***
          0 #small_island
          )
X.lo <- matrix(x.lo, nrow=length(x.lo), ncol=length(lwg.range))
#X.lo[6,] <- ((lwg.range*median(dat2$Eff_cinc_rat))/(1-lwg.range)) #replacing with different CINC values
X.lo[9,] <- lwg.range#replacing different ratios dependenet on these diff CINC values
#X.lo[9,] <- lwg.range^2 #now squared
B.tilde <- mvrnorm(1000, coef(func), vcov(func)) #1000 draws of coefficient vectors
s.lo <- inv.logit(B.tilde %*% X.lo) #matrix of predicted probabilities

s.lo<-apply(s.lo, 2, quantile, c(0.025, 0.5, .975))



t_col <- function(color, percent = 50, name = NULL) {
#   color = color name
# percent = % transparency
#    name = an optional name for the color
# Get RGB values for named color
  rgb.val <- col2rgb(color)
# Make new color using input color as base and alpha set by transparency
  t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
               max = 255,
               alpha = (100-percent)*255/100,
               names = name)
}









lwg.range <- seq(from=min(dat2$Eff_cinc_rat),to=max(dat2$Eff_cinc_rat),by=.01)


y.lo <- c(1, #intercept
          0, #Allied
          0,#Jointdem
          median(dat2$cwpceyrs_bkt),#peace years
          median(dat2$spline1), #spline1
          median(dat2$spline2), #spline2
          median(dat2$spline3), #spline3
          0, #landcontig
          0, #Eff_cinc_rat ***
          0 #small_island
          )
y.lo <- matrix(y.lo, nrow=length(y.lo), ncol=length(lwg.range))
#X.lo[6,] <- ((lwg.range*median(dat2$Eff_cinc_rat))/(1-lwg.range)) #replacing with different CINC values
y.lo[9,] <- lwg.range#replacing different ratios dependenet on these diff CINC values
#X.lo[9,] <- lwg.range^2 #now squared
B.tilde <- mvrnorm(1000, coef(func2), vcov(func2)) #1000 draws of coefficient vectors
s2.lo <- inv.logit(B.tilde %*% y.lo) #matrix of predicted probabilities

s2.lo<-apply(s2.lo, 2, quantile, c(0.025, 0.5, .975))



t_col <- function(color, percent = 50, name = NULL) {
#   color = color name
# percent = % transparency
#    name = an optional name for the color
# Get RGB values for named color
  rgb.val <- col2rgb(color)
# Make new color using input color as base and alpha set by transparency
  t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
               max = 255,
               alpha = (100-percent)*255/100,
               names = name)
}



pdf(file = "../Code/Figure_A5.pdf",   # The directory you want to save the file in
   # width = 4, # The width of the plot in inches
   height = 4
    )
op <- par(family = "Times")
mar.default <- c(5,4,4,2) + 0.1
par(mar = mar.default + c(0, 4, 0, 0)) 
plot(lwg.range, s2.lo[2,], ylim=c(0,.005), xlab = "Power Ratio",
     ylab = "",
     main = "Model 3", bty="n",
     col="white",las=1, family="Times")
title(ylab="Predicted Probability of MID", line=4, cex.lab=1, family="Times")
polygon(x=c(lwg.range, rev(lwg.range)),
        y=c(s2.lo[1,], rev(s2.lo[3,])),
        col=t_col(grey(0.8), percent=70), border=NA)
lines(lwg.range, s2.lo[2,], lwd=2, lty = 2)
polygon(x=c(lwg.range, rev(lwg.range)),
        y=c(s.lo[1,], rev(s.lo[3,])),
        col=t_col(grey(0.8), percent=70), border=NA)
lines(lwg.range, s.lo[2,], lwd=2)
legend("topright", title = "Measurement of Power", c("Nominal","Proximate"), lty= c(2,1))
par(op)
dev.off()




```











